library(magrittr)
library(tidyverse)
library(Seurat)
library(readxl)
library(cowplot)
library(colorblindr)
library(viridis)
library(progeny)
library(destiny)

coi <- params$cell_type_super
cell_sort <- params$cell_sort
cell_type_major <- params$cell_type_major
louvain_resolution <- params$louvain_resolution
louvain_cluster <- params$louvain_cluster

1 Cluster markers

1.1 Major T.super markers for cell assign

### load all data ---------------------------------
source("_src/global_vars.R")

# seu_obj <- read_rds(paste0("/work/shah/isabl_data_lake/analyses/16/52/1652/celltypes/", coi, "_processed.rds"))
seu_obj <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/outs_pre/", coi, "_seurat_", louvain_resolution, ".rds"))
# seu_obj <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_highqc.rds"))

myfeatures <- c("umapharmony_1", "umapharmony_2", "sample", louvain_cluster, "doublet", "nCount_RNA", "nFeature_RNA", "percent.mt", "doublet_score")

plot_data_wrapper <- function(cluster_res) {
  cluster_res <- enquo(cluster_res)
  as_tibble(FetchData(seu_obj, myfeatures)) %>% 
    left_join(meta_tbl, by = "sample") %>% 
    rename(cluster = !!cluster_res) %>% 
    mutate(cluster = as.character(cluster),
           tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite)))
}

plot_data <- plot_data_wrapper(louvain_cluster)

1.2 Subtype currated markers

helper_f <- function(x) ifelse(is.na(x), "", x)

markers_v6_super[[coi]] %>% 
  group_by(subtype) %>% 
  mutate(rank = row_number(gene)) %>% 
  spread(subtype, gene) %>% 
  mutate_all(.funs = helper_f) %>% 
  formattable::formattable()
rank CD4.T.CXCL13 CD4.T.naive CD4.T.reg CD8.T.CXCL13 CD8.T.cytotoxic CD8.T.ISG Cycling.T.NK dissociated NK.CD56 NK.cytotoxic NK.naive
1 CD4 CCR7 CD4 CCL3 CCL4 IFI6 ASPM BTG1 AREG FCGR3A CCR6
2 CD40LG CD4 FOXP3 CCL4L2 CCL5 IFIT1 CENPF DNAJB1 FCER1G FGFBP2 IL4I1
3 CTLA4 CD40LG IL2RA CD8A CD8A IFIT2 HIST1H4C DUSP1 GNLY KLRD1 IL7R
4 CXCL13 IL7R TNFRSF4 CRTAM CD8B IFIT3 HMGB2 EGR1 KLRC1 KLRF1 KLRB1
5 FKBP5 KLF2 TRAC CXCL13 CRTAM ISG15 MKI67 FOS KRT81 PRF1 LST1
6 IL6ST LTB FABP5 CST7 MX1 STMN1 FOSB TRDC SPON2 LTB
7 ITM2A TCF7 GZMB DTHD1 MX2 TOP2A HSPA1A TYROBP TNFSF13B
8 MAF TPT1 HAVCR2 GZMA RSAD2 TUBA1B HSPA1B XCL1
9 NMB IFNG GZMH TUBB HSPA6 XCL2
10 NR3C1 LAG3 GZMK TYMS JUN
11 PDCD1 MIR155HG GZMM JUNB
12 TNFRSF4 PHLDA1 HLA-DPB1 KLF2
13 TOX2 PTMS ITM2C MT1E
14 TSHZ2 RBPJ KLRG1 MT1X
15 TNFRSF9 TRGC2

1.3 Subtype cluster markers

# marker_tbl <- read_tsv(paste0("/work/shah/isabl_data_lake/analyses/16/52/1652/celltypes/", coi, "_markers.tsv")) %>% 
#   filter(resolution == louvain_resolution)
marker_tbl <- read_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/outs_pre/", coi, "_markers_", louvain_resolution, ".tsv"))
# marker_tbl <- read_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_highqc_markers_02.tsv"))

## Hypergeometric test --------------------------------------

test_set <- marker_tbl %>% 
  group_by(cluster) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(k = length(cluster)) %>% 
  ungroup %>%
  select(cluster, gene, k) %>% 
  mutate(join_helper = 1) %>% 
  group_by(cluster, join_helper, k) %>%
  nest(test_set = gene)

markers_doub_tbl <- markers_v6 %>% 
  enframe("subtype", "gene") %>% 
  filter(!(subtype %in% unique(c(coi, cell_type_major)))) %>% 
  unnest(gene) %>% 
  group_by(gene) %>% 
  filter(length(gene) == 1) %>% 
  mutate(subtype = paste0("doublet.", subtype)) %>% 
  bind_rows(tibble(subtype = "Mito.high", gene = grep("^MT-", rownames(seu_obj), value = T)))

ref_set <- markers_v6_super[[coi]] %>% 
  bind_rows(markers_doub_tbl) %>% 
  group_by(subtype) %>% 
  mutate(m = length(gene),
         n = length(rownames(seu_obj))-m,
         join_helper = 1) %>% 
  group_by(subtype, m, n, join_helper) %>%
  nest(ref_set = gene)

hyper_tbl <- test_set %>% 
  left_join(ref_set, by = "join_helper") %>% 
  group_by(cluster, subtype, m, n, k) %>%
  do(q = length(intersect(unlist(.$ref_set), unlist(.$test_set)))) %>%
  mutate(pval = 1-phyper(q = q, m = m, n = n, k = k)) %>%
  ungroup %>%
  mutate(qval = p.adjust(pval, "BH"),
         sig = qval < 0.01)

# hyper_tbl %>% 
#   group_by(subtype) %>% 
#   filter(any(qval < 0.01)) %>%
#   ggplot(aes(subtype, -log10(qval), fill = sig)) +
#   geom_bar(stat = "identity") +
#   facet_wrap(~cluster) +
#   coord_flip()
  
low_rank <- str_detect(unique(hyper_tbl$subtype), "doublet|dissociated")
subtype_lvl <- c(sort(unique(hyper_tbl$subtype)[!low_rank]), sort(unique(hyper_tbl$subtype)[low_rank]))
  
cluster_label_tbl <- hyper_tbl %>% 
  mutate(subtype = ordered(subtype, levels = subtype_lvl)) %>% 
  arrange(qval, subtype) %>%
  group_by(cluster) %>% 
  slice(1) %>% 
  mutate(subtype = ifelse(sig, as.character(subtype), paste0("unknown_", cluster))) %>% 
  select(cluster, cluster_label = subtype) %>% 
  ungroup %>% 
  mutate(cluster_label = make.unique(cluster_label, sep = "_"))

seu_obj$cluster_label <- unname(deframe(cluster_label_tbl)[as.character(unlist(seu_obj[[paste0("RNA_snn_res.", louvain_resolution)]]))])
plot_data$cluster_label <- seu_obj$cluster_label

cluster_n_tbl <- seu_obj$cluster_label %>% 
  table() %>% 
  enframe("cluster_label", "cluster_n") %>% 
  mutate(cluster_nrel = cluster_n/sum(cluster_n))

marker_sheet <- marker_tbl %>% 
  left_join(cluster_label_tbl, by = "cluster") %>% 
  group_by(cluster_label) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(rank = row_number(-avg_logFC)) %>% 
  select(cluster_label, gene, rank) %>% 
  ungroup %>% 
  mutate(cluster_label = ordered(cluster_label, levels = unique(c(names(clrs$cluster_label[[coi]]), sort(cluster_label))))) %>% 
  spread(cluster_label, gene) %>% 
  mutate_all(.funs = helper_f)

marker_tbl_annotated <- marker_tbl %>% 
  left_join(cluster_label_tbl, by = "cluster") %>% 
  left_join(cluster_n_tbl, by = "cluster_label") %>% 
  select(-cluster, -resolution) %>% 
  mutate(cluster_label = ordered(cluster_label, levels = unique(c(names(clrs$cluster_label[[coi]]), sort(cluster_label))))) %>% 
  arrange(cluster_label, -avg_logFC, p_val_adj)

write_tsv(marker_sheet, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_sheet.tsv"))

write_tsv(marker_sheet, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/supplementary_tables/", coi, "_marker_sheet.tsv"))

write_tsv(marker_tbl_annotated, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_table_annotated.tsv"))

write_tsv(marker_tbl_annotated, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/supplementary_tables/", coi, "_marker_table_annotated.tsv"))

formattable::formattable(marker_sheet)
rank CD4.T.naive CD4.T.CXCL13 CD4.T.reg CD8.T.cytotoxic CD8.T.ISG CD8.T.CXCL13 NK.naive NK.CD56 NK.cytotoxic Cycling.T.NK Cycling.T.NK_1 dissociated dissociated_1 dissociated_2 doublet.Fibroblast doublet.Fibroblast_1 doublet.Monocyte doublet.Plasma.cell
1 IL7R CXCL13 TNFRSF4 GZMK IFIT3 CXCL13 KLRB1 GNLY FGFBP2 STMN1 CENPF CCL4L2 HSPA1A KLF2 DCN IGFBP5 HLA-DRA SOX4
2 CCR7 NMB IL2RA CD8A ISG15 GZMB IL4I1 TYROBP FCGR3A TUBA1B ASPM CCL4 HSPA1B CCR7 IGFBP5 TAGLN CST3 PTCRA
3 KLF2 NR3C1 FOXP3 CD8B MX1 CCL4L2 IL7R AREG SPON2 MKI67 MKI67 IFNG MT1X JUNB RBP1 ADIRF CXCL8 MAL
4 EEF1B2 MAF CTLA4 ITM2C IFIT1 MIR155HG LTB KLRC1 PRF1 HIST1H4C HMGB2 FOS DNAJB1 FOS C7 DCN SPP1 MZB1
5 TPT1 FKBP5 LTB GZMH RSAD2 TNFRSF9 LST1 FCER1G KLRF1 TUBB TOP2A FOSB MT1E DUSP1 MEG3 IGFBP7 S100A9 DNTT
6 EEF1A1 IL6ST RTKN2 CCL5 IFIT2 HAVCR2 TNFSF13B TRDC GNLY TOP2A UBE2C TNF HSPA6 SELL CALD1 SPARCL1 S100A8 TFDP2
7 MAL ITM2A BATF TRGC2 IFI6 RBPJ CCR6 XCL1 KLRD1 TYMS CCNB1 JUN HSP90AA1 IL7R IGFBP4 CALD1 HLA-DQA1 CD1E
8 TCF7 TSHZ2 TNFRSF18 KLRG1 MX2 LAG3 CTSH KLRD1 NKG7 CENPF UBE2S EGR1 HSPH1 AREG RARRES2 MGP CD74 STMN1
9 CD40LG CTLA4 SAT1 GZMA IFI44L IFNG AQP3 KRT81 CX3CR1 HMGB2 PTTG1 CCL3L1 HSPE1 EEF1A1 NR2F2 C11orf96 LYZ ARPP21
10 SELL CD40LG TBC1D4 CCL4 ISG20 CCL3 CCL20 XCL2 GZMB ASPM TPX2 NFKBID HSPD1 CD69 SELENOP MYL9 FTL AC084033.3
11 GPR183 PDCD1 TIGIT CRTAM HERC5 PTMS NFKBIA IGFBP2 PLAC8 NUSAP1 STMN1 CD69 HSPB1 BTG2 MDK TIMP3 APOE CDK6
12 LDHB CD4 GADD45A CST7 SAMD9L CD8A RORA CLIC3 CLIC3 PCLAF KPNA2 NR4A2 HSP90AB1 GPR183 SOX4 MDK HLA-DQB1 MAP1A
13 NOSIP LIMS1 TNFRSF1B GZMM OAS1 CRTAM TNFRSF25 IL2RB PLEK HMGN2 CENPE AC020916.1 DNAJA1 PIK3IP1 ADIRF TPM2 MARCKS AC011893.1
14 SNHG8 TNFRSF4 PMAIP1 HLA-DPB1 TNFSF10 PHLDA1 CEBPD CEBPD TYROBP H2AFZ CKS2 EGR2 JUN CD55 MGP NR2F2 AIF1 GLUL
15 PABPC1 RNF19A UGP2 DTHD1 STAT1 FABP5 TNFAIP3 KRT86 PTGDS PCNA TUBA1B DUSP1 CACYBP DNAJB1 EGR1 FBLN1 C1QB ADA
16 NOP53 CORO1B IKZF2 PPP1R14B EIF2AK2 TIGIT NCR3 TXK EFHD2 HIST1H1B HMMR TNFSF9 HSPA8 FKBP5 STAR IGF1 FTH1 AL357060.1
17 LEF1 RBPJ TNFRSF9 CD3G OAS3 KRT86 SLC4A10 CTSW FCER1G DUT DLGAP5 KLF6 FKBP4 TSC22D3 SFRP4 RAMP1 BASP1 GRASP
18 EIF3E CPM ICOS THEMIS SAMD9 JAML TMIGD2 KLRB1 CST7 CLSPN CCNB2 TNFAIP3 CHORDC1 RACK1 CLU SELENOP C1QA CD1B
19 LTB ZBED2 LINC01943 DUSP2 XAF1 CCL5 DPP4 MATK GZMH SMC4 TUBB4B DUSP2 RGS2 LDHB ADAMTS1 RARRES2 C15orf48 MIR181A1HG
20 RACK1 AC004585.1 IL32 CD3D GBP1 LINC01871 TPT1 CCL3 ADGRG1 ATAD2 NUSAP1 BTG2 FOS SARAF TIMP2 LUM FN1 CCDC26
21 NACA TOX2 SOX4 LYAR EPSTI1 CXCR6 MYBL1 CD7 CCL3 SMC2 BIRC5 IER2 ANXA1 CXCR4 TCEAL4 IGFBP6 MNDA JCHAIN
22 UBA52 DUSP4 ARID5B TC2N IFI44 TNIP3 S100A4 NKG7 HOPX TPX2 HMGN2 PPP1R15A DNAJB4 PLAC8 C1R CARMN G0S2 VIPR2
23 TOMM7 AHI1 CD27 EOMES PLSCR1 PDCD1 CD40LG CD63 IGFBP7 TMPO ARL6IP1 NR4A1 PPP1R15A EEF1B2 WFDC2 IGFBP4 APOC1 ID1
24 SOCS3 ICA1 BIRC3 CXCR6 IFI35 HLA-DRB1 SPOCK2 HOPX ZEB2 HELLS CDC20 NFKBIA ZFAND2A TCF7 FHL2 DST SOD2 SOCS2
25 JUNB ARID5B LAYN HLA-DPA1 USP18 GAPDH LINC01871 TMIGD2 PRSS23 UBE2C CDKN3 JUND FOSB TPT1 IFITM3 CAV1 NPC2 RCAN1
26 SERINC5 CD84 CORO1B HLA-DRB1 OASL FAM3C ERN1 CMC1 AKR1C3 NASP SMC4 GADD45B DNAJA4 FOSB PEG3 ID4 LST1 CLDN5
27 TMEM123 CCDC50 TYMP SLF1 MT2A CTLA4 JAML TNFRSF18 CD247 DEK CKS1B TSC22D3 DUSP1 NOSIP C1S NUPR1 GSN CASC15
28 EEF2 IGFL2 DUSP4 APOBEC3G HELZ2 SPRY1 FKBP11 KLRC2 MYBL1 RRM2 KIF20B TAGAP TSPYL2 SC5D LUM CSRP2 MS4A6A PFKFB2
29 FXYD5 RGS1 CD4 PPP2R5C TRIM22 CCND2 IFNGR1 SRGAP3 AREG CKS1B GTSE1 ZFP36 SERPINH1 NACA SERPINF1 CDKN1C IL1B GALNT2
30 TSHZ2 BATF ENTPD1 KIAA1551 CMPK2 CD63 MGAT4A GSTP1 C1orf21 KNL1 TUBA1C ZFP36L1 UBC AP3M2 CEBPD PGR PSAP HES4
31 TRABD2A SRGN CTSC F2R LY6E CD8B S100A6 LAT2 S1PR5 HMGB1 TUBB RGCC UBB BTG1 AKAP12 COL6A1 GRN MARCKSL1
32 ANK3 CH25H MIR4435-2HG SLAMF7 PARP14 GZMH ELK3 GZMB CTSW MCM7 SGO2 IL7R AHSA1 NOP53 TIMP1 COL6A2 CD83 TP53INP1
33 SARAF SPOCK2 LINC02099 CXCR4 NT5C3A ENTPD1 EEF1A1 LINC00996 ABHD17A FABP5 H2AFZ CRTAM NEU1 ZBTB16 CST3 EMX2 CTSH APBA2
34 AQP3 ZNRF1 MAGEH1 SH2D1A DDX58 SRGAP3 KIT PRF1 KLF2 UBE2S JPT1 KDM6B DNAJB6 ERAP2 NR2F1 PALLD GLUL TSHR
35 RIPOR2 CHN1 SPOCK2 FAM102A IRF7 VCAM1 LTC4S KLRF1 PTPN12 GAPDH CEP55 NR4A3 GADD45B CCND3 DLK1 PPP1R14A MEF2C NREP
36 AP3M2 TNFRSF25 PHACTR2 CD52 RNF213 ID2 RUNX2 NCAM1 TTC38 CXCL13 NUF2 ATF3 KLF6 EEF1D SERPING1 RBP1 CYBB MME
37 ZFAS1 CD200 CARD16 YBX3 DDX60L HLA-DRA RORC CXXC5 KLRB1 RANBP1 KIF14 DDX3X BTG2 PPP1R15A BEX3 C7 CTSB AC002454.1
38 LINC02273 METTL8 S100A4 STK17A DDX60 ITGAE ZBTB16 SH2D1B CCL4 H2AFX KNL1 DUSP6 JUNB PLK3 C11orf96 MFGE8 CSF3R CHI3L2
39 EIF4B RILPL2 STAM CCR5 SAT1 DUSP4 FAM241A MCTP2 PTGDR MCM3 CENPA GZMK TNF ZFP36L2 FILIP1L KANK2 CD14 SMIM3
40 TOB1 TNFRSF18 GLRX GPR174 PPM1K LYST PDE4D IFITM3 ITGB2 EZH2 CDK1 KLRG1 ERN1 EEF2 RARRES1 NR2F1 C1QC SSBP2
41 SESN3 SLA SPATS2L COTL1 PNPT1 TNFSF4 IL23R ZNF683 XBP1 TUBB4B HMGB3 JUNB JUND HNRNPA1 COL1A2 MFAP4 SGK1 UHRF1
42 NSA2 SMCO4 AC005224.3 CCL4L2 PARP9 NDFIP2 B3GALT2 ITGA1 CEP78 H2AFV CDCA8 RASGEF1B ATF3 VSIR NUPR1 COL1A1 SPI1 LRRC28
43 PASK BTLA MAF CD3E IFIH1 AKAP5 CERK IFITM2 ARL4C CDK1 PLK1 ANXA1 DEDD2 PASK TCEAL9 PBX1 FCGRT BCL11A
44 TNFRSF25 NAP1L4 PBXIP1 TUBA4A SP110 GOLIM4 TLE1 CCL5 BIN2 HIST1H1D AURKA MCL1 CD69 EIF3H MARCKSL1 GSN EGR1 SCAI
45 FAU FYB1 F5 PECAM1 OAS2 CD27 EEF1B2 ITGAX LITAF HNRNPAB TROAP CXCR4 CLK1 TXNIP SLC40A1 PDGFRB FCGR2A ATP6AP1L
46 EEF1D MIR155HG SLAMF1 ARAP2 C19orf66 SNAP47 CFH CD38 TRDC CENPE H2AFV IER5 IER5L LDLRAP1 CFH SERPINF1 PLAUR RUFY3
47 LDLRAP1 PTPN13 BTG3 ITGA1 STAT2 RGS1 PERP SAMD3 TXK TK1 KIF2C MYADM H3F3B CMTM8 APOE SFRP1 CPVL CD79A
48 CTSL SESN3 TRAC JAML LAG3 HLA-DPA1 PLAT SLC16A3 MYOM2 ZWINT MAD2L1 CD8A NR4A1 SCML1 GNG11 LHFPL6 MS4A7 GNA15
49 ITGA6 BIRC3 IL1R1 CD84 LAP3 LINC02446 KIF5C CAPN12 GZMM BIRC5 NUCKS1 PTGER4 CXCR4 LINC00402 CDKN1C PLAC9 ALDH2 HHIP-AS1
50 PFDN5 TP53INP1 DNPH1 AOAH APOL6 SAMSN1 PLCB1 CD247 CD300A PTTG1 RAD21 ZFP36L2 EIF4A2 RIPK2 NBL1 SERPING1 SERPINA1 GSTM3

2 Clusters

2.1 sizes

enframe(sort(table(seu_obj$cluster_label))) %>% 
  mutate(name = ordered(name, levels = rev(name))) %>% 
  ggplot() +
  geom_bar(aes(name, value), stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(y = c("#cells"), x = "cluster")

2.2 UMAP

alpha_lvl <- ifelse(nrow(plot_data) < 20000, 0.2, 0.1)
pt_size <- ifelse(nrow(plot_data) < 20000, 0.2, 0.05)

common_layers_disc <- list(  
  geom_point(size = pt_size, alpha = alpha_lvl),
  NoAxes(),
  guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))),
  labs(color = "")
)

common_layers_cont <- list(  
  geom_point(size = pt_size, alpha = alpha_lvl),
  NoAxes(),
  scale_color_gradientn(colors = viridis(9)),
  guides(color = guide_colorbar())
)

ggplot(plot_data, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  #facet_wrap(~therapy) +
  ggtitle("Sub cluster")

3 Filtering out doublet clusters

3.1 UMAP

my_subtypes <- names(clrs$cluster_label[[coi]])
my_subtypes <- c(my_subtypes, unlist(lapply(paste0("_", 1:3), function(x) paste0(my_subtypes, x)))) %>% .[!str_detect(., "doublet|dissociated")]
my_subtypes <- my_subtypes[my_subtypes %in% unique(seu_obj$cluster_label)]
my_subtypes <- my_subtypes[my_subtypes %in% names(clrs$cluster_label[[coi]])]

cells_to_keep <- colnames(seu_obj)[seu_obj$cluster_label %in% my_subtypes]
# seu_obj_sub <- subset(seu_obj, cells = cells_to_keep)
# seu_obj_sub <- RunUMAP(seu_obj_sub, dims = 1:50, reduction = "harmony", reduction.name = "umapharmony", reduction.key = "umapharmony_")
# seu_obj_sub$cluster_label <- seu_obj$cluster_label[colnames(seu_obj) %in% colnames(seu_obj_sub)]
# write_rds(seu_obj_sub, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered.rds"))
seu_obj_sub <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered.rds"))

plot_data_sub <- as_tibble(FetchData(seu_obj_sub, c(myfeatures, "cluster_label"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_sub),
         cluster_label = ordered(cluster_label, levels = my_subtypes),
         )
  
if (cell_sort == "CD45+") {
plot_data_sub <- filter(plot_data_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_sub <- filter(plot_data_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
  # geom_point(aes(umapharmony_1, umapharmony_2), 
  #            color = "grey90", size = 0.01, 
  #            data = select(plot_data_sub, -tumor_supersite)) +
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

write_tsv(select(plot_data_sub, cell_id, everything(), -umapharmony_1, -umapharmony_2, -contains("RNA_")), paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_embedding.tsv"))

4 subcluster CD8.T.ISG cluster

big_clusters <- list(
  CD8.T.ISG = c("CD8.T.ISG")
)

cells_to_keep_list <- lapply(big_clusters, function(x) colnames(seu_obj_sub)[seu_obj_sub$cluster_label %in% x])

# seu_list <- lapply(cells_to_keep_list, function(x) subset(seu_obj_sub, cells = x))
# 
# preprocess_wrapper <- . %>%
#   FindNeighbors(reduction = "harmony", dims = 1:50) %>%
#   FindClusters(res = 0.1)
# 
# seu_list <- lapply(seu_list, preprocess_wrapper)
# write_rds(seu_list, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_sub_clusters.rds"))

seu_list <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_sub_clusters.rds"))

cluster_vector_list <- list()

for (i in names(seu_list)) {
  cluster_vector_list[[i]] <- cbind(cell_id = colnames(seu_list[[i]]), FetchData(seu_list[[i]], c("RNA_snn_res.0.1"))) %>%
    as_tibble %>%
    mutate(RNA_snn_res.0.1 = paste0(i, "_", RNA_snn_res.0.1)) %>%
    deframe
}

cluster_vector <- unlist(cluster_vector_list, use.names = F) %>% setNames(lapply(cluster_vector_list, names) %>% unlist(use.names = F))

seu_obj$cluster_extended <- cluster_vector[seu_obj$cell_id] %>%
  setNames(seu_obj$cell_id)

seu_obj_sub$cluster_extended <- cluster_vector[seu_obj_sub$cell_id] %>%
  setNames(seu_obj_sub$cell_id)

cluster_extended_uniq <- as.character(na.omit(unique(seu_obj_sub$cluster_extended)))

Idents(seu_obj_sub) <- seu_obj_sub$cluster_extended
Idents(seu_obj) <- seu_obj$cluster_extended

# marker_tbl_extended <- lapply(
#   cluster_extended_uniq,
#   function(x) FindMarkers(seu_obj_sub, ident.1 = x)
# ) %>%
#   setNames(cluster_extended_uniq) %>%
#   bind_rows(.id = "cluster_extended") %>%
#   as_tibble(rownames = "gene") %>%
#   separate(gene, into = c("gene", "drop"), sep = "\\.\\.\\.") %>%
#   select(-drop)
# 
# write_tsv(marker_tbl_extended, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_table_annotated_extended.tsv"))

marker_tbl_extended <- read_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_table_annotated_extended.tsv"))
 
cluster_label_extended <- c(
  CD8.T.ISG_0 = "CD8.T.ISG",
  CD8.T.ISG_1 = "CD4.T.ISG",
  CD8.T.ISG_2 = "dissociated_3"
)

names(seu_obj$cluster_label) <- colnames(seu_obj)
seu_obj$cluster_label[is.na(seu_obj$cluster_label)] <- "NA"
Idents(seu_obj) <- seu_obj$cluster_label
seu_obj$cluster_label[seu_obj$cluster_label == "CD8.T.ISG"] <- cluster_label_extended[seu_obj$cluster_extended][seu_obj$cluster_label == "CD8.T.ISG"]

Idents(seu_obj_sub) <- seu_obj_sub$cluster_label
seu_obj_sub$cluster_label[seu_obj_sub$cluster_label == "CD8.T.ISG"] <- cluster_label_extended[seu_obj_sub$cluster_extended][seu_obj_sub$cluster_label == "CD8.T.ISG"]

marker_sheet_extended <- marker_tbl_extended %>%
  mutate(cluster_label = cluster_label_extended[cluster_extended]) %>% 
  group_by(cluster_label) %>%
  mutate(rank = row_number()) %>% 
  slice(1:50) %>% 
  select(rank, gene, cluster_label) %>% 
  spread(cluster_label, gene)

marker_sheet_joined <- marker_sheet %>% 
  select(-CD8.T.ISG) %>% 
  left_join(marker_sheet_extended, by = "rank") %>%
  gather(cluster_label, gene, -rank) %>% 
  mutate(cluster_label = ordered(cluster_label, levels = unique(c(names(clrs$cluster_label[[coi]]), sort(cluster_label))))) %>% 
  spread(cluster_label, gene)

cluster_n_tbl_full <- seu_obj$cluster_label %>% 
  table() %>% 
  enframe("cluster_label", "cluster_n") %>% 
  mutate(cluster_nrel = cluster_n/sum(cluster_n)) %>% 
  filter(cluster_label != "NA")

marker_tbl_extended_annotated <- marker_tbl_extended %>% 
  mutate(cluster_label = cluster_label_extended[cluster_extended]) %>%
  select(-cluster_extended) %>% 
  left_join(cluster_n_tbl_full, by = "cluster_label")

marker_tbl_annotated_full <- marker_tbl_annotated %>% 
  filter(!(cluster_label %in% unlist(big_clusters))) %>% 
  bind_rows(marker_tbl_extended_annotated) %>% 
  mutate(cluster_label = ordered(cluster_label, levels = unique(c(names(clrs$cluster_label[[coi]]), sort(cluster_label))))) %>% 
  arrange(cluster_label, -avg_logFC)

# Idents(seu_obj_sub) <- seu_obj_sub$cluster_label
# 
# cells_to_keep <- colnames(seu_obj_sub)[!(str_detect(seu_obj_sub$cluster_label, "dissociated|doublet"))]
# seu_obj_sub_sub <- subset(seu_obj_sub, cells = cells_to_keep)
# seu_obj_sub_sub <- RunUMAP(seu_obj_sub_sub, dims = 1:50, reduction = "harmony", reduction.name = "umapharmony", reduction.key = "umapharmony_")
# write_rds(seu_obj_sub_sub, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_sub.rds"))

seu_obj_sub_sub <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_sub.rds"))

formattable::formattable(marker_sheet_joined)
rank CD4.T.naive CD4.T.ISG CD4.T.CXCL13 CD4.T.reg CD8.T.cytotoxic CD8.T.ISG CD8.T.CXCL13 NK.naive NK.CD56 NK.cytotoxic Cycling.T.NK Cycling.T.NK_1 dissociated dissociated_1 dissociated_2 dissociated_3 doublet.Fibroblast doublet.Fibroblast_1 doublet.Monocyte doublet.Plasma.cell
1 IL7R IFIT3 CXCL13 TNFRSF4 GZMK IFIT3 CXCL13 KLRB1 GNLY FGFBP2 STMN1 CENPF CCL4L2 HSPA1A KLF2 XIST DCN IGFBP5 HLA-DRA SOX4
2 CCR7 ISG15 NMB IL2RA CD8A ISG15 GZMB IL4I1 TYROBP FCGR3A TUBA1B ASPM CCL4 HSPA1B CCR7 MALAT1 IGFBP5 TAGLN CST3 PTCRA
3 KLF2 IFIT2 NR3C1 FOXP3 CD8B IFIT1 CCL4L2 IL7R AREG SPON2 MKI67 MKI67 IFNG MT1X JUNB NEAT1 RBP1 ADIRF CXCL8 MAL
4 EEF1B2 MX2 MAF CTLA4 ITM2C MX1 MIR155HG LTB KLRC1 PRF1 HIST1H4C HMGB2 FOS DNAJB1 FOS MT-ND3 C7 DCN SPP1 MZB1
5 TPT1 IFI44L FKBP5 LTB GZMH IFIT2 TNFRSF9 LST1 FCER1G KLRF1 TUBB TOP2A FOSB MT1E DUSP1 MT-ND1 MEG3 IGFBP7 S100A9 DNTT
6 EEF1A1 RSAD2 IL6ST RTKN2 CCL5 RSAD2 HAVCR2 TNFSF13B TRDC GNLY TOP2A UBE2C TNF HSPA6 SELL MT-CO3 CALD1 SPARCL1 S100A8 TFDP2
7 MAL IFIT1 ITM2A BATF TRGC2 IFI6 RBPJ CCR6 XCL1 KLRD1 TYMS CCNB1 JUN HSP90AA1 IL7R MT-ND2 IGFBP4 CALD1 HLA-DQA1 CD1E
8 TCF7 MX1 TSHZ2 TNFRSF18 KLRG1 ISG20 LAG3 CTSH KLRD1 NKG7 CENPF UBE2S EGR1 HSPH1 AREG MT-CYB RARRES2 MGP CD74 STMN1
9 CD40LG IL7R CTLA4 SAT1 GZMA MX2 IFNG AQP3 KRT81 CX3CR1 HMGB2 PTTG1 CCL3L1 HSPE1 EEF1A1 MT-ATP6 NR2F2 C11orf96 LYZ ARPP21
10 SELL IFI6 CD40LG TBC1D4 CCL4 SAMD9L CCL3 CCL20 XCL2 GZMB ASPM TPX2 NFKBID HSPD1 CD69 MT-CO1 SELENOP MYL9 FTL AC084033.3
11 GPR183 TNFSF10 PDCD1 TIGIT CRTAM IFI44L PTMS NFKBIA IGFBP2 PLAC8 NUSAP1 STMN1 CD69 HSPB1 BTG2 PTPRC MDK TIMP3 APOE CDK6
12 LDHB STAT1 CD4 GADD45A CST7 HERC5 CD8A RORA CLIC3 CLIC3 PCLAF KPNA2 NR4A2 HSP90AB1 GPR183 MT-CO2 SOX4 MDK HLA-DQB1 MAP1A
13 NOSIP HERC5 LIMS1 TNFRSF1B GZMM OAS1 CRTAM TNFRSF25 IL2RB PLEK HMGN2 CENPE AC020916.1 DNAJA1 PIK3IP1 H3F3B ADIRF TPM2 MARCKS AC011893.1
14 SNHG8 ISG20 TNFRSF4 PMAIP1 HLA-DPB1 MT2A PHLDA1 CEBPD CEBPD TYROBP H2AFZ CKS2 EGR2 JUN CD55 CCNI MGP NR2F2 AIF1 GLUL
15 PABPC1 OAS1 RNF19A UGP2 DTHD1 STAT1 FABP5 TNFAIP3 KRT86 PTGDS PCNA TUBA1B DUSP1 CACYBP DNAJB1 GMFG EGR1 FBLN1 C1QB ADA
16 NOP53 OAS3 CORO1B IKZF2 PPP1R14B TNFSF10 TIGIT NCR3 TXK EFHD2 HIST1H1B HMMR TNFSF9 HSPA8 FKBP5 COX5B STAR IGF1 FTH1 AL357060.1
17 LEF1 EIF2AK2 RBPJ TNFRSF9 CD3G OASL KRT86 SLC4A10 CTSW FCER1G DUT DLGAP5 KLF6 FKBP4 TSC22D3 CALM2 SFRP4 RAMP1 BASP1 GRASP
18 EIF3E IFI44 CPM ICOS THEMIS PLSCR1 JAML TMIGD2 KLRB1 CST7 CLSPN CCNB2 TNFAIP3 CHORDC1 RACK1 PSMB1 CLU SELENOP C1QA CD1B
19 LTB GBP1 ZBED2 LINC01943 DUSP2 GBP1 CCL5 DPP4 MATK GZMH SMC4 TUBB4B DUSP2 RGS2 LDHB TPM3 ADAMTS1 RARRES2 C15orf48 MIR181A1HG
20 RACK1 CCR7 AC004585.1 IL32 CD3D EIF2AK2 LINC01871 TPT1 CCL3 ADGRG1 ATAD2 NUSAP1 BTG2 FOS SARAF AES TIMP2 LUM FN1 CCDC26
21 NACA EPSTI1 TOX2 SOX4 LYAR LAG3 CXCR6 MYBL1 CD7 CCL3 SMC2 BIRC5 IER2 ANXA1 CXCR4 ATP5MPL TCEAL4 IGFBP6 MNDA JCHAIN
22 UBA52 MT2A DUSP4 ARID5B TC2N SAMD9 TNIP3 S100A4 NKG7 HOPX TPX2 HMGN2 PPP1R15A DNAJB4 PLAC8 GTF3A C1R CARMN G0S2 VIPR2
23 TOMM7 SAMD9L AHI1 CD27 EOMES IFI35 PDCD1 CD40LG CD63 IGFBP7 TMPO ARL6IP1 NR4A1 PPP1R15A EEF1B2 SAP18 WFDC2 IGFBP4 APOC1 ID1
24 SOCS3 XAF1 ICA1 BIRC3 CXCR6 EPSTI1 HLA-DRB1 SPOCK2 HOPX ZEB2 HELLS CDC20 NFKBIA ZFAND2A TCF7 GNG5 FHL2 DST SOD2 SOCS2
25 JUNB TMEM123 ARID5B LAYN HLA-DPA1 OAS3 GAPDH LINC01871 TMIGD2 PRSS23 UBE2C CDKN3 JUND FOSB TPT1 COPE IFITM3 CAV1 NPC2 RCAN1
26 SERINC5 DDX58 CD84 CORO1B HLA-DRB1 XAF1 FAM3C ERN1 CMC1 AKR1C3 NASP SMC4 GADD45B DNAJA4 FOSB TRMT112 PEG3 ID4 LST1 CLDN5
27 TMEM123 CMPK2 CCDC50 TYMP SLF1 USP18 CTLA4 JAML TNFRSF18 CD247 DEK CKS1B TSC22D3 DUSP1 NOSIP CIB1 C1S NUPR1 GSN CASC15
28 EEF2 CD40LG IGFL2 DUSP4 APOBEC3G LY6E SPRY1 FKBP11 KLRC2 MYBL1 RRM2 KIF20B TAGAP TSPYL2 SC5D RBX1 LUM CSRP2 MS4A6A PFKFB2
29 FXYD5 SAMD9 RGS1 CD4 PPP2R5C CMPK2 CCND2 IFNGR1 SRGAP3 AREG CKS1B GTSE1 ZFP36 SERPINH1 NACA BRK1 SERPINF1 CDKN1C IL1B GALNT2
30 TSHZ2 GPR183 BATF ENTPD1 KIAA1551 IFI44 CD63 MGAT4A GSTP1 C1orf21 KNL1 TUBA1C ZFP36L1 UBC AP3M2 HSPA8 CEBPD PGR PSAP HES4
31 TRABD2A USP18 SRGN CTSC F2R NT5C3A CD8B S100A6 LAT2 S1PR5 HMGB1 TUBB RGCC UBB BTG1 VAMP8 AKAP12 COL6A1 GRN MARCKSL1
32 ANK3 LY6E CH25H MIR4435-2HG SLAMF7 HELZ2 GZMH ELK3 GZMB CTSW MCM7 SGO2 IL7R AHSA1 NOP53 PSMB9 TIMP1 COL6A2 CD83 TP53INP1
33 SARAF DDX60L SPOCK2 LINC02099 CXCR4 IRF7 ENTPD1 EEF1A1 LINC00996 ABHD17A FABP5 H2AFZ CRTAM NEU1 ZBTB16 S100A11 CST3 EMX2 CTSH APBA2
34 AQP3 TRIM22 ZNRF1 MAGEH1 SH2D1A CD38 SRGAP3 KIT PRF1 KLF2 UBE2S JPT1 KDM6B DNAJB6 ERAP2 POMP NR2F1 PALLD GLUL TSHR
35 RIPOR2 HELZ2 CHN1 SPOCK2 FAM102A TRIM22 VCAM1 LTC4S KLRF1 PTPN12 GAPDH CEP55 NR4A3 GADD45B CCND3 EDF1 DLK1 PPP1R14A MEF2C NREP
36 AP3M2 ANXA1 TNFRSF25 PHACTR2 CD52 DDX58 ID2 RUNX2 NCAM1 TTC38 CXCL13 NUF2 ATF3 KLF6 EEF1D NDUFS5 SERPING1 RBP1 CYBB MME
37 ZFAS1 NT5C3A CD200 CARD16 YBX3 PARP14 HLA-DRA RORC CXXC5 KLRB1 RANBP1 KIF14 DDX3X BTG2 PPP1R15A UQCR11 BEX3 C7 CTSB AC002454.1
38 LINC02273 IFIH1 METTL8 S100A4 STK17A CD8A ITGAE ZBTB16 SH2D1B CCL4 H2AFX KNL1 DUSP6 JUNB PLK3 HNRNPA1 C11orf96 MFGE8 CSF3R CHI3L2
39 EIF4B PLSCR1 RILPL2 STAM CCR5 DDX60 DUSP4 FAM241A MCTP2 PTGDR MCM3 CENPA GZMK TNF ZFP36L2 LDHB FILIP1L KANK2 CD14 SMIM3
40 TOB1 IFI35 TNFRSF18 GLRX GPR174 PPM1K LYST PDE4D IFITM3 ITGB2 EZH2 CDK1 KLRG1 ERN1 EEF2 ABRACL RARRES1 NR2F1 C1QC SSBP2
41 SESN3 IRF7 SLA SPATS2L COTL1 RNF213 TNFSF4 IL23R ZNF683 XBP1 TUBB4B HMGB3 JUNB JUND HNRNPA1 ATP5F1D COL1A2 MFAP4 SGK1 UHRF1
42 NSA2 DDX60 SMCO4 AC005224.3 CCL4L2 IFIH1 NDFIP2 B3GALT2 ITGA1 CEP78 H2AFV CDCA8 RASGEF1B ATF3 VSIR ANP32B NUPR1 COL1A1 SPI1 LRRC28
43 PASK PNPT1 BTLA MAF CD3E DDX60L AKAP5 CERK IFITM2 ARL4C CDK1 PLK1 ANXA1 DEDD2 PASK TRAPPC1 TCEAL9 PBX1 FCGRT BCL11A
44 TNFRSF25 OAS2 NAP1L4 PBXIP1 TUBA4A OAS2 GOLIM4 TLE1 CCL5 BIN2 HIST1H1D AURKA MCL1 CD69 EIF3H SNRPB MARCKSL1 GSN EGR1 SCAI
45 FAU PPM1K FYB1 F5 PECAM1 PARP9 CD27 EEF1B2 ITGAX LITAF HNRNPAB TROAP CXCR4 CLK1 TXNIP SEPT7 SLC40A1 PDGFRB FCGR2A ATP6AP1L
46 EEF1D PARP9 MIR155HG SLAMF1 ARAP2 PNPT1 SNAP47 CFH CD38 TRDC CENPE H2AFV IER5 IER5L LDLRAP1 ZFAS1 CFH SERPINF1 PLAUR RUFY3
47 LDLRAP1 LGALS3BP PTPN13 BTG3 ITGA1 GZMK RGS1 PERP SAMD3 TXK TK1 KIF2C MYADM H3F3B CMTM8 SEC61G APOE SFRP1 CPVL CD79A
48 CTSL LTB SESN3 TRAC JAML SP110 HLA-DPA1 PLAT SLC16A3 MYOM2 ZWINT MAD2L1 CD8A NR4A1 SCML1 UQCRH GNG11 LHFPL6 MS4A7 GNA15
49 ITGA6 PGAP1 BIRC3 IL1R1 CD84 BST2 LINC02446 KIF5C CAPN12 GZMM BIRC5 NUCKS1 PTGER4 CXCR4 LINC00402 PPP1CA CDKN1C PLAC9 ALDH2 HHIP-AS1
50 PFDN5 LAMP3 TP53INP1 DNPH1 AOAH C19orf66 SAMSN1 PLCB1 CD247 CD300A PTTG1 RAD21 ZFP36L2 EIF4A2 RIPK2 SUB1 NBL1 SERPING1 SERPINA1 GSTM3
write_tsv(marker_sheet_joined, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_sheet_full.tsv"))

write_tsv(marker_sheet_joined, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/supplementary_tables/", coi, "_marker_sheet_full.tsv"))

write_tsv(marker_tbl_annotated_full, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_table_annotated_full.tsv"))

write_tsv(marker_tbl_annotated_full, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/supplementary_tables/", coi, "_marker_table_annotated_full.tsv"))
plot_data_sub_sub <- as_tibble(FetchData(seu_obj_sub_sub, c(myfeatures, "cluster_label"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_sub_sub),
         cluster_label = ordered(cluster_label, levels = names(clrs$cluster_label[[coi]])),
         )
  
if (cell_sort == "CD45+") {
plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

4.1 add module scores and pathway scores

signature_modules <- read_excel("_data/small/signatures/SPECTRUM v5 sub cluster markers.xlsx", sheet = 2, skip = 1, range = "M2:P100") %>% 
  gather(module, gene) %>% 
  na.omit() %>% 
  group_by(module) %>% 
  do(gene = c(.$gene)) %>% 
  {setNames(.$gene, .$module)}

signature_modules$ISG.module <- c("CCL5", "CXCL10", "IFNA1", "IFNB1", "ISG15", "IFI27L2", "SAMD9L")

## compute expression module scores
for (i in 1:length(signature_modules)) {
  seu_obj_sub_sub <- AddModuleScore(seu_obj_sub_sub, features = signature_modules[i], name = names(signature_modules)[i])
  seu_obj_sub_sub[[names(signature_modules)[i]]] <- seu_obj_sub_sub[[paste0(names(signature_modules)[i], "1")]]
  seu_obj_sub_sub[[paste0(names(signature_modules)[i], "1")]] <- NULL
  print(paste(names(signature_modules)[i], "DONE"))
}
## [1] "CD8.Cytotoxic DONE"
## [1] "CD8.Dysfunctional DONE"
## [1] "CD8.Naive DONE"
## [1] "CD8.Predysfunctional DONE"
## [1] "ISG.module DONE"
## compute progeny scores
progeny_list <- seu_obj_sub_sub@assays$RNA@data[VariableFeatures(seu_obj_sub_sub),] %>% 
  as.matrix %>% 
  progeny %>% 
  as.data.frame %>% 
  as.list

names(progeny_list) <- make.names(paste0(names(progeny_list), ".pathway"))

for (i in 1:length(progeny_list)) {
  seu_obj_sub_sub <- AddMetaData(seu_obj_sub_sub, 
                                 metadata = progeny_list[[i]],
                                 col.name = names(progeny_list)[i])
}

write_rds(seu_obj_sub_sub, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_annotated.rds"))

seu_obj_sub_sub <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_annotated.rds"))

4.2 marker heatmap

marker_top_tbl <- marker_sheet_joined[,-1] %>% 
  slice(1:10) %>% 
  as.list %>% 
  .[!str_detect(names(.), "doublet|dissociated")] %>% 
  enframe("cluster_label_x", "gene") %>% 
  unnest(gene)

plot_data_markers <- as_tibble(FetchData(seu_obj_sub_sub, c("cluster_label", myfeatures, unique(marker_top_tbl$gene)))) %>% 
  gather(gene, value, -c(1:(length(myfeatures)+1))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cluster_label = ordered(cluster_label, levels = my_subtypes)) %>% 
  group_by(cluster_label, gene) %>% 
  summarise(value = mean(value, na.rm = T)) %>% 
  group_by(gene) %>% 
  mutate(value = scales::rescale(value)) %>% 
  left_join(marker_top_tbl, by = "gene") %>% 
  mutate(cluster_label_x = ordered(cluster_label_x, levels = rev(names(clrs$cluster_label[[coi]]))))

ggplot(plot_data_markers) +
  geom_tile(aes(gene, cluster_label, fill = value)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  facet_grid(~cluster_label_x, scales = "free", space = "free") +
  scale_fill_gradientn(colors = viridis(9)) +
  labs(fill = "Scaled\nexpression") +
  theme(aspect.ratio = 1,
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank())

# ggsave(paste0("_fig/002_marker_heatmap_", coi, ".pdf"), width = nrow(marker_top_tbl)/6, height = 5)

4.3 composition

4.3.1 per site

comp_site_tbl <- plot_data_sub_sub %>%
  filter(!is.na(tumor_supersite)) %>% 
  group_by(cluster_label, tumor_supersite) %>%
  tally %>%
  group_by(tumor_supersite) %>%
  mutate(nrel = n/sum(n)*100) %>%
  ungroup

pnrel_site <- ggplot(comp_site_tbl) +
  geom_bar(aes(tumor_supersite, nrel, fill = cluster_label),
           stat = "identity", position = position_stack()) +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(fill = "Cluster", y = "Fraction [%]", x = "") +
  scale_fill_manual(values = clrs$cluster_label[[coi]])

pnabs_site <- ggplot(comp_site_tbl) +
  geom_bar(aes(tumor_supersite, n, fill = cluster_label),
           stat = "identity", position = position_stack()) +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(fill = "Cluster", y = "# cells", x = "") +
  scale_fill_manual(values = clrs$cluster_label[[coi]])

plot_grid(pnabs_site, pnrel_site, ncol = 2, align = "h")

# ggsave(paste0("_fig/02_deep_dive_", coi, "_comp_site.pdf"), width = 8, height = 4)

4.3.2 per sample

comp_tbl_sample_sort <- plot_data_sub_sub %>% 
  group_by(tumor_subsite, tumor_supersite, patient_id, consensus_signature, therapy, cluster_label) %>% 
  tally %>% 
  group_by(tumor_subsite, tumor_supersite, patient_id, consensus_signature, therapy) %>% 
  mutate(nrel = n/sum(n)*100,
         nsum = sum(n),
         log10n = log10(n),
         label_supersite = "Site",
         label_mutsig = "Signature",
         label_therapy = "Rx") %>% 
  ungroup %>% 
  arrange(desc(therapy), tumor_supersite) %>% 
  mutate(tumor_subsite_rx = paste0(tumor_subsite, "_", therapy)) %>% 
  mutate(tumor_subsite = ordered(tumor_subsite, levels = unique(tumor_subsite)),
         tumor_subsite_rx = ordered(tumor_subsite_rx, levels = unique(tumor_subsite_rx))) %>% 
  arrange(patient_id) %>% 
  mutate(label_patient_id = ifelse(as.logical(as.numeric(fct_inorder(as.character(patient_id)))%%2), "Patient1", "Patient2"))

sample_id_x_tbl <- plot_data_sub %>% 
  mutate(sort_short_x = cell_sort) %>% 
  distinct(patient_id, sort_short_x, tumor_subsite, therapy, sample) %>% 
  unite(sample_id_x, patient_id, sort_short_x, tumor_subsite, therapy) %>% 
  arrange(sample_id_x)

comp_tbl_sample_sort %>% 
  mutate(sort_short_x = cell_sort) %>% 
  unite(sample_id_x, patient_id, sort_short_x, tumor_subsite_rx) %>% 
  select(sample_id_x, cluster_label, n, nrel, nsum) %>% 
  left_join(sample_id_x_tbl, by = "sample_id_x") %>% 
  write_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_subtype_compositions.tsv"))
ybreaks <- c(500, 1000, 2000, 4000, 6000, 8000, 10000, 15000, 20000)

max_cells_per_sample <- max(comp_tbl_sample_sort$nsum)
ymaxn <- ybreaks[max_cells_per_sample < ybreaks][1]

comp_plot_wrapper <- function(y = "nrel", switch = NULL) {
  if (y == "nrel") ylab <- paste0("Fraction\nof cells [%]")
  if (y == "n") ylab <- paste0("Number\nof cells")
  p <- ggplot(comp_tbl_sample_sort, 
              aes_string("tumor_subsite_rx", y, fill = "cluster_label")) + 
    facet_grid(~patient_id, space = "free", scales = "free", switch = switch) +
    coord_cartesian(clip = "off") + 
    scale_fill_manual(values = clrs$cluster_label[[coi]]) + 
    theme(axis.text.x = element_blank(),
          axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5, 
                                      margin = margin(0, -0.4, 0, 0, unit = "npc")),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          axis.line.x = element_blank(),
          strip.text.y = element_blank(),
          strip.text.x = element_blank(),
          strip.background.y = element_blank(),
          strip.background.x = element_blank(),
          plot.margin = margin(0, 0, 0, 0, "npc")) + 
    labs(x = "",
         y = ylab) +
    guides(fill = FALSE)
  if (y == "nrel") p <- p + 
    geom_bar(stat = "identity") +
    scale_y_continuous(expand = c(0, 0), 
                       breaks = c(0, 50, 100), 
                       labels = c("0", "50", "100"))
  if (y == "n") p <- p + 
    geom_bar(stat = "identity", position = position_stack()) +
    scale_y_continuous(expand = c(0, 0), 
                       breaks = c(0, ymaxn/2, ymaxn),
                       limits = c(0, ymaxn),
                       labels = c("", ymaxn/2, ymaxn)) +
    expand_limits(y = c(0, ymaxn)) +
    theme(panel.grid.major.y = element_line(linetype = 1, color = "grey90", size = 0.5))
  return(p)
} 

common_label_layers <- list(
  geom_tile(color = "white", size = 0.15),
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.line.x = element_blank(),
        strip.text = element_blank(),
        strip.background = element_blank(),
        plot.margin = margin(0, 0, 0, 0, "npc")),
  scale_y_discrete(expand = c(0, 0)),
  labs(y = ""),
  guides(fill = FALSE),
  facet_grid(~patient_id, 
             space = "free", scales = "free")
)

comp_label_site <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_supersite, patient_id), 
       aes(tumor_subsite_rx, label_supersite, 
           fill = tumor_supersite)) + 
  scale_fill_manual(values = clrs$tumor_supersite) +
  common_label_layers

comp_label_rx <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_therapy, consensus_signature, patient_id), 
       aes(tumor_subsite_rx, label_therapy, 
           fill = therapy)) + 
  scale_fill_manual(values = c(`post-Rx` = "gold3", `pre-Rx` = "steelblue")) +
  common_label_layers

comp_label_mutsig <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_mutsig, consensus_signature, patient_id), 
       aes(tumor_subsite_rx, label_mutsig, 
           fill = consensus_signature)) + 
  scale_fill_manual(values = clrs$consensus_signature) +
  common_label_layers

patient_label_tbl <- distinct(comp_tbl_sample_sort, patient_id, .keep_all = T)

comp_label_patient_id <- ggplot(patient_label_tbl, aes(tumor_subsite_rx, label_patient_id)) + 
  scale_fill_manual(values = clrs$consensus_signature) +
  geom_text(aes(tumor_subsite_rx, label_patient_id, label = patient_id)) +
  facet_grid(~patient_id, 
             space = "free", scales = "free") +
  coord_cartesian(clip = "off") + 
  theme_void() +
  theme(strip.text = element_blank(),
        strip.background = element_blank(),
        plot.margin = margin(0, 0, 0, 0, "npc"))

hist_plot_wrapper <- function(x = "nrel") {
  if (x == "nrel") {
    xlab <- paste0("Fraction of cells [%]")
    bw <- 5
  }
  if (x == "log10n") {
    xlab <- paste0("Number of cells")
    bw <- 0.2
  }
  p <- ggplot(comp_tbl_sample_sort) +
    ggridges::geom_density_ridges(
      aes_string(x, "cluster_label", fill = "cluster_label"), color = "black",
      stat = "binline", binwidth = bw, scale = 3) +
    facet_grid(label_supersite~., 
               space = "free", scales = "free") +
    scale_fill_manual(values = clrs$cluster_label[[coi]]) + 
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.y = element_blank(),
          axis.line.y = element_blank(),
          strip.text = element_blank(),
          strip.background = element_blank(),
          plot.margin = margin(0, 0, 0, 0, "npc")) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_discrete(expand = c(0, 0)) +
    guides(fill = FALSE) +
    labs(x = xlab)
  if (x == "log10n") p <- p + expand_limits(x = c(0, 3)) + 
    scale_x_continuous(expand = c(0, 0), 
                       labels = function(x) parse(text = paste("10^", x)))
  return(p)
}

pcomp1 <- comp_plot_wrapper("n")
pcomp2 <- comp_plot_wrapper("nrel")

pcomp_grid <- plot_grid(comp_label_patient_id, 
                        pcomp1, pcomp2, 
                        comp_label_site, comp_label_rx, comp_label_mutsig,
                        ncol = 1, align = "v", 
                        rel_heights = c(0.15, 0.33, 0.33, 0.06, 0.06, 0.06))

phist1 <- hist_plot_wrapper("log10n")

pcomp_hist_grid <- ggdraw() +
  draw_plot(pcomp_grid, x = 0.01, y = 0, width = 0.85, height = 1) +
  draw_plot(phist1, x = 0.87, y = 0.05, width = 0.12, height = 0.8)

pcomp_hist_grid

# ggsave(paste0("_fig/02_composition_v6_",coi,".pdf"), pcomp_hist_grid, width = 10, height = 2)

4.3.3 site specific cluster enrichment

comp_tbl_z <- comp_tbl_sample_sort %>% 
  filter(therapy == "pre-Rx",
         !(tumor_supersite %in% c("Ascites", "Other"))) %>% 
  group_by(patient_id, cluster_label) %>% 
  arrange(patient_id, cluster_label, nrel) %>% 
  mutate(rank = row_number(nrel),
         z_rank = scales::rescale(rank)) %>% 
  mutate(mean_nrel = mean(nrel, na.rm = T),
         sd_nrel = sd(nrel, na.rm = T),
         z_nrel = (nrel - mean_nrel) / sd_nrel) %>% 
  ungroup()

ggplot(comp_tbl_z) +
  geom_boxplot(aes(tumor_supersite, z_nrel, color = tumor_supersite), 
               outlier.shape = NA) +
  geom_point(aes(tumor_supersite, z_nrel, color = tumor_supersite), position = position_jitter(), size = 0.1) +
  facet_grid(~cluster_label, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        aspect.ratio = 5) +
  scale_color_manual(values = clrs$tumor_supersite)

ggplot(comp_tbl_z) +
  geom_boxplot(aes(tumor_supersite, z_rank, color = tumor_supersite), 
               outlier.shape = NA) +
  geom_point(aes(tumor_supersite, z_rank, color = tumor_supersite), position = position_jitter(), size = 0.1) +
  facet_grid(~cluster_label, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        aspect.ratio = 5) +
  scale_color_manual(values = clrs$tumor_supersite)

## sub cluster CD8 cells
# seu_obj_cd8 <- seu_obj_sub_sub %>%
#   subset(subset = cluster_label %in% my_subtypes[str_detect(my_subtypes, "CD8.T")]) %>% 
#   RunUMAP(dims = 1:50, reduction = "harmony", reduction.name = "umapharmony", reduction.key = "umapharmony_")
# 
# write_rds(seu_obj_cd8, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed.rds")

seu_obj_cd8 <- read_rds("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed.rds")

# marker_tbl <- FindAllMarkers(seu_obj_cd8, only.pos = T)
# write_tsv(as_tibble(marker_tbl), "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_markers.tsv")
# marker_tbl <- read_tsv("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_markers.tsv")
# 
# ## Hypergeometric test --------------------------------------
# 
# test_set <- marker_tbl %>% 
#   group_by(cluster) %>% 
#   filter(!str_detect(gene, "^RPS|^RPL")) %>% 
#   slice(1:50) %>% 
#   mutate(k = length(cluster)) %>% 
#   ungroup %>%
#   select(cluster, gene, k) %>% 
#   mutate(join_helper = 1) %>% 
#   group_by(cluster, join_helper, k) %>%
#   nest(test_set = gene)
# 
# markers_doub_tbl <- markers_v6 %>% 
#   enframe("subtype", "gene") %>% 
#   filter(!(subtype %in% unique(c(coi, cell_type_major)))) %>% 
#   unnest(gene) %>% 
#   group_by(gene) %>% 
#   filter(length(gene) == 1) %>% 
#   mutate(subtype = paste0("doublet.", subtype)) %>% 
#   bind_rows(tibble(subtype = "Mito.high", gene = grep("^MT-", rownames(seu_obj), value = T)))
# 
# ref_set <- markers_v6_super[["CD8.T"]] %>% 
#   bind_rows(markers_doub_tbl) %>% 
#   group_by(subtype) %>% 
#   mutate(m = length(gene),
#          n = length(rownames(seu_obj))-m,
#          join_helper = 1) %>% 
#   group_by(subtype, m, n, join_helper) %>%
#   nest(ref_set = gene)
# 
# hyper_tbl <- test_set %>% 
#   left_join(ref_set, by = "join_helper") %>% 
#   group_by(cluster, subtype, m, n, k) %>%
#   do(q = length(intersect(unlist(.$ref_set), unlist(.$test_set)))) %>%
#   mutate(pval = 1-phyper(q = q, m = m, n = n, k = k)) %>%
#   ungroup %>%
#   mutate(qval = p.adjust(pval, "BH"),
#          sig = qval < 0.01)
# 
# # hyper_tbl %>% 
# #   group_by(subtype) %>% 
# #   filter(any(qval < 0.01)) %>%
# #   ggplot(aes(subtype, -log10(qval), fill = sig)) +
# #   geom_bar(stat = "identity") +
# #   facet_wrap(~cluster) +
# #   coord_flip()
#   
# low_rank <- str_detect(unique(hyper_tbl$subtype), "Mito|doublet")
# subtype_lvl <- c(sort(unique(hyper_tbl$subtype)[!low_rank]), sort(unique(hyper_tbl$subtype)[low_rank]))
#   
# cluster_label_tbl <- hyper_tbl %>% 
#   mutate(subtype = ordered(subtype, levels = subtype_lvl)) %>% 
#   arrange(qval, subtype) %>%
#   group_by(cluster) %>% 
#   slice(1) %>% 
#   mutate(subtype = ifelse(sig, as.character(subtype), paste0("unknown_", cluster))) %>% 
#   select(cluster, cluster_label = subtype) %>% 
#   ungroup %>% 
#   mutate(cluster_label = make.unique(cluster_label, sep = "_"))
# 
# marker_sheet <- marker_tbl %>% 
#   left_join(cluster_label_tbl, by = "cluster") %>% 
#   group_by(cluster_label) %>% 
#   filter(!str_detect(gene, "^RPS|^RPL")) %>% 
#   slice(1:50) %>% 
#   mutate(rank = row_number()) %>% 
#   select(cluster_label, gene, rank) %>% 
#   spread(cluster_label, gene) %>% 
#   mutate_all(.funs = helper_f)
# 
# formattable::formattable(marker_sheet)
# write_tsv(marker_sheet, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T.cell_marker_sheet.tsv"))
# seu_obj_cd8$cluster_label <- unname(deframe(cluster_label_tbl)[as.character(unlist(seu_obj_cd8[[paste0("RNA_snn_res.", louvain_resolution)]]))])
# 
# seu_obj_sub_sub <- seu_obj_sub
# 
# cluster_label_tbl_1 <- as_tibble(cbind(cell_id=colnames(seu_obj_sub), FetchData(seu_obj_sub, c("cluster_label"))))
# cluster_label_tbl_2 <- as_tibble(cbind(cell_id=colnames(seu_obj_cd8), FetchData(seu_obj_cd8, c("cluster_label"))))
# 
# cluster_label_tbl <- left_join(cluster_label_tbl_1, cluster_label_tbl_2, by = "cell_id") %>%
#   mutate(cluster_label = ifelse(is.na(cluster_label.y), cluster_label.x, cluster_label.y))
# 
# seu_obj_sub_sub$cluster_label <- cluster_label_tbl$cluster_label
# 
# seu_obj_sub_sub <- subset(seu_obj_sub_sub, subset = cluster_label != "doublet.Plasma.cell")
# write_rds(seu_obj_sub_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/T.cell_processed_filtered_sub.rds")
# root_cell <- "SPECTRUM-OV-036_S1_CD45P_PELVIC_PERITONEUM_CGGACACCAACGACTT"
# seu_obj_cd8_sub <- subset(seu_obj_cd8, subset = cluster_label != "doublet.Plasma.cell" & cluster_label != "gd.T.cell")
# seu_obj_cd8_sub <- subset(seu_obj_cd8_sub, cells = c(root_cell, colnames(seu_obj_cd8_sub)[colnames(seu_obj_cd8_sub)!=root_cell][-1]))
# 
# dc_obj <- DiffusionMap(seu_obj_cd8_sub@reductions$harmony@cell.embeddings, k = 100)
# dc_mat <- dc_obj@eigenvectors
# colnames(dc_mat) <- paste0("DC_", 1:ncol(dc_mat))
# seu_obj_cd8_sub[["DC"]] <- CreateDimReducObject(embeddings = dc_mat, key = "DC_", assay = DefaultAssay(seu_obj_cd8_sub))
# 
# write_rds(seu_obj_cd8_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")
# 
# dpt_obj <- DPT(dc_obj)
# write_rds(dpt_obj, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_sub_dpt.rds")
# seu_obj_cd8_sub$DPT1 <- dpt_obj$DPT1
# 
# write_rds(seu_obj_cd8_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")
seu_obj_cd8_sub <- read_rds("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")
# plot_data_sub_sub <- as_tibble(FetchData(seu_obj_sub_sub, c(myfeatures, "cluster_label"))) %>% 
#   left_join(meta_tbl, by = "sample") %>% 
#   mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
#          tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
#   mutate(cell_id = colnames(seu_obj_sub_sub),
#          cluster_label = ordered(cluster_label, levels = my_subtypes),
#          )
#   
# if (cell_sort == "CD45+") {
# plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
# }
# 
# if (cell_sort == "CD45-") {
# plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
# }
# 
# ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
#   common_layers_disc +
#   ggtitle("Sub cluster") +
#   #facet_wrap(~cluster_label) +
#   scale_color_manual(values = clrs$cluster_label[[coi]])
# 
# ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
#   common_layers_disc +
#   ggtitle("Patient") +
#   # facet_wrap(~therapy) +
#   scale_color_manual(values = clrs$patient_id_short)
# 
# ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
#   # geom_point(aes(umapharmony_1, umapharmony_2), 
#   #            color = "grey90", size = 0.01, 
#   #            data = select(plot_data_sub_sub, -tumor_supersite)) +
#   common_layers_disc +
#   ggtitle("Site") +
#   # facet_wrap(~therapy) +
#   scale_color_manual(values = clrs$tumor_supersite)
# 
# write_tsv(select(plot_data_sub_sub, cell_id, everything(), -umapharmony_1, -umapharmony_2, -contains("RNA_")), paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_embedding_sub.tsv"))

4.3.4 UMAP & DC

plot_data_cd8_sub <- as_tibble(FetchData(seu_obj_cd8_sub, c(myfeatures, "cluster_label", "DC_1", "DC_2"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_cd8_sub),
         cluster_label = ordered(cluster_label, levels = my_subtypes),
         )
  
if (cell_sort == "CD45+") {
plot_data_cd8_sub <- filter(plot_data_cd8_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_cd8_sub <- filter(plot_data_cd8_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = tumor_supersite)) + 
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

5 session info

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       Debian GNU/Linux 10 (buster)
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       Etc/UTC                     
##  date     2021-01-21                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date       lib
##  abind                  1.4-5      2016-07-21 [2]
##  ape                    5.3        2019-03-17 [2]
##  assertthat             0.2.1      2019-03-21 [2]
##  backports              1.1.10     2020-09-15 [1]
##  bibtex                 0.4.2.2    2020-01-02 [2]
##  Biobase                2.46.0     2019-10-29 [2]
##  BiocGenerics           0.32.0     2019-10-29 [2]
##  BiocParallel           1.20.1     2019-12-21 [2]
##  bitops                 1.0-6      2013-08-17 [2]
##  boot                   1.3-24     2019-12-20 [3]
##  broom                  0.7.2      2020-10-20 [1]
##  callr                  3.4.2      2020-02-12 [1]
##  car                    3.0-8      2020-05-21 [1]
##  carData                3.0-4      2020-05-22 [1]
##  caTools                1.17.1.4   2020-01-13 [2]
##  cellranger             1.1.0      2016-07-27 [2]
##  class                  7.3-15     2019-01-01 [3]
##  cli                    2.0.2      2020-02-28 [1]
##  cluster                2.1.0      2019-06-19 [3]
##  codetools              0.2-16     2018-12-24 [3]
##  colorblindr          * 0.1.0      2020-01-13 [2]
##  colorspace           * 1.4-2      2019-12-29 [2]
##  cowplot              * 1.0.0      2019-07-11 [2]
##  crayon                 1.3.4      2017-09-16 [1]
##  curl                   4.3        2019-12-02 [2]
##  data.table             1.12.8     2019-12-09 [2]
##  DBI                    1.1.0      2019-12-15 [2]
##  dbplyr                 2.0.0      2020-11-03 [1]
##  DelayedArray           0.12.2     2020-01-06 [2]
##  DEoptimR               1.0-8      2016-11-19 [1]
##  desc                   1.2.0      2018-05-01 [2]
##  destiny              * 3.0.1      2020-01-16 [1]
##  devtools               2.2.1      2019-09-24 [2]
##  digest                 0.6.25     2020-02-23 [1]
##  dplyr                * 1.0.2      2020-08-18 [1]
##  e1071                  1.7-3      2019-11-26 [1]
##  ellipsis               0.3.1      2020-05-15 [1]
##  evaluate               0.14       2019-05-28 [2]
##  fansi                  0.4.1      2020-01-08 [2]
##  farver                 2.0.3      2020-01-16 [1]
##  fitdistrplus           1.0-14     2019-01-23 [2]
##  forcats              * 0.5.0      2020-03-01 [1]
##  foreign                0.8-74     2019-12-26 [3]
##  formattable            0.2.0.1    2016-08-05 [1]
##  fs                     1.5.0      2020-07-31 [1]
##  future                 1.15.1     2019-11-25 [2]
##  future.apply           1.4.0      2020-01-07 [2]
##  gbRd                   0.4-11     2012-10-01 [2]
##  gdata                  2.18.0     2017-06-06 [2]
##  generics               0.0.2      2018-11-29 [2]
##  GenomeInfoDb           1.22.0     2019-10-29 [2]
##  GenomeInfoDbData       1.2.2      2020-01-14 [2]
##  GenomicRanges          1.38.0     2019-10-29 [2]
##  ggplot.multistats      1.0.0      2019-10-28 [1]
##  ggplot2              * 3.3.2      2020-06-19 [1]
##  ggrepel                0.8.1      2019-05-07 [2]
##  ggridges               0.5.2      2020-01-12 [2]
##  ggthemes               4.2.0      2019-05-13 [1]
##  globals                0.12.5     2019-12-07 [2]
##  glue                   1.3.2      2020-03-12 [1]
##  gplots                 3.0.1.2    2020-01-11 [2]
##  gridExtra              2.3        2017-09-09 [2]
##  gtable                 0.3.0      2019-03-25 [2]
##  gtools                 3.8.1      2018-06-26 [2]
##  haven                  2.3.1      2020-06-01 [1]
##  hexbin                 1.28.0     2019-11-11 [2]
##  hms                    0.5.3      2020-01-08 [1]
##  htmltools              0.4.0      2019-10-04 [2]
##  htmlwidgets            1.5.1      2019-10-08 [2]
##  httr                   1.4.2      2020-07-20 [1]
##  ica                    1.0-2      2018-05-24 [2]
##  igraph                 1.2.5      2020-03-19 [1]
##  IRanges                2.20.2     2020-01-13 [2]
##  irlba                  2.3.3      2019-02-05 [2]
##  jsonlite               1.7.1      2020-09-07 [1]
##  KernSmooth             2.23-16    2019-10-15 [3]
##  knitr                  1.26       2019-11-12 [2]
##  labeling               0.3        2014-08-23 [2]
##  laeken                 0.5.1      2020-02-05 [1]
##  lattice                0.20-38    2018-11-04 [3]
##  lazyeval               0.2.2      2019-03-15 [2]
##  leiden                 0.3.1      2019-07-23 [2]
##  lifecycle              0.2.0      2020-03-06 [1]
##  listenv                0.8.0      2019-12-05 [2]
##  lmtest                 0.9-37     2019-04-30 [2]
##  lsei                   1.2-0      2017-10-23 [2]
##  lubridate              1.7.9.2    2020-11-13 [1]
##  magrittr             * 2.0.1      2020-11-17 [1]
##  MASS                   7.3-51.5   2019-12-20 [3]
##  Matrix                 1.2-18     2019-11-27 [3]
##  matrixStats            0.56.0     2020-03-13 [1]
##  memoise                1.1.0      2017-04-21 [2]
##  metap                  1.2        2019-12-08 [2]
##  mnormt                 1.5-5      2016-10-15 [2]
##  modelr                 0.1.8      2020-05-19 [1]
##  multcomp               1.4-12     2020-01-10 [2]
##  multtest               2.42.0     2019-10-29 [2]
##  munsell                0.5.0      2018-06-12 [2]
##  mutoss                 0.1-12     2017-12-04 [2]
##  mvtnorm                1.0-12     2020-01-09 [2]
##  nlme                   3.1-143    2019-12-10 [3]
##  nnet                   7.3-12     2016-02-02 [3]
##  npsurv                 0.4-0      2017-10-14 [2]
##  numDeriv               2016.8-1.1 2019-06-06 [2]
##  openxlsx               4.1.5      2020-05-06 [1]
##  pbapply                1.4-2      2019-08-31 [2]
##  pcaMethods             1.78.0     2019-10-29 [2]
##  pillar                 1.4.6      2020-07-10 [1]
##  pkgbuild               1.0.6      2019-10-09 [2]
##  pkgconfig              2.0.3      2019-09-22 [1]
##  pkgload                1.0.2      2018-10-29 [2]
##  plotly                 4.9.1      2019-11-07 [2]
##  plotrix                3.7-7      2019-12-05 [2]
##  plyr                   1.8.5      2019-12-10 [2]
##  png                    0.1-7      2013-12-03 [2]
##  prettyunits            1.1.1      2020-01-24 [1]
##  processx               3.4.2      2020-02-09 [1]
##  progeny              * 1.11.3     2020-10-22 [1]
##  proxy                  0.4-24     2020-04-25 [1]
##  ps                     1.3.2      2020-02-13 [1]
##  purrr                * 0.3.4      2020-04-17 [1]
##  R.methodsS3            1.7.1      2016-02-16 [2]
##  R.oo                   1.23.0     2019-11-03 [2]
##  R.utils                2.9.2      2019-12-08 [2]
##  R6                     2.4.1      2019-11-12 [1]
##  ranger                 0.12.1     2020-01-10 [1]
##  RANN                   2.6.1      2019-01-08 [2]
##  rappdirs               0.3.1      2016-03-28 [2]
##  RColorBrewer           1.1-2      2014-12-07 [2]
##  Rcpp                   1.0.4      2020-03-17 [1]
##  RcppAnnoy              0.0.16     2020-03-08 [1]
##  RcppEigen              0.3.3.7.0  2019-11-16 [2]
##  RcppHNSW               0.2.0      2019-09-20 [2]
##  RcppParallel           4.4.4      2019-09-27 [2]
##  RCurl                  1.98-1.1   2020-01-19 [1]
##  Rdpack                 0.11-1     2019-12-14 [2]
##  readr                * 1.4.0      2020-10-05 [1]
##  readxl               * 1.3.1      2019-03-13 [2]
##  rematch                1.0.1      2016-04-21 [2]
##  remotes                2.1.0      2019-06-24 [2]
##  reprex                 0.3.0      2019-05-16 [2]
##  reshape2               1.4.3      2017-12-11 [2]
##  reticulate             1.14       2019-12-17 [2]
##  rio                    0.5.16     2018-11-26 [1]
##  rlang                  0.4.8      2020-10-08 [1]
##  rmarkdown              2.0        2019-12-12 [2]
##  robustbase             0.93-6     2020-03-23 [1]
##  ROCR                   1.0-7      2015-03-26 [2]
##  rprojroot              1.3-2      2018-01-03 [2]
##  RSpectra               0.16-0     2019-12-01 [2]
##  rstudioapi             0.11       2020-02-07 [1]
##  rsvd                   1.0.3      2020-02-17 [1]
##  Rtsne                  0.15       2018-11-10 [2]
##  rvest                  0.3.6      2020-07-25 [1]
##  S4Vectors              0.24.2     2020-01-13 [2]
##  sandwich               2.5-1      2019-04-06 [2]
##  scales                 1.1.0      2019-11-18 [2]
##  scatterplot3d          0.3-41     2018-03-14 [1]
##  sctransform            0.2.1      2019-12-17 [2]
##  SDMTools               1.1-221.2  2019-11-30 [2]
##  sessioninfo            1.1.1      2018-11-05 [2]
##  Seurat               * 3.1.2      2019-12-12 [2]
##  SingleCellExperiment   1.8.0      2019-10-29 [2]
##  smoother               1.1        2015-04-16 [1]
##  sn                     1.5-4      2019-05-14 [2]
##  sp                     1.4-2      2020-05-20 [1]
##  stringi                1.5.3      2020-09-09 [1]
##  stringr              * 1.4.0      2019-02-10 [1]
##  SummarizedExperiment   1.16.1     2019-12-19 [2]
##  survival               3.1-8      2019-12-03 [3]
##  testthat               2.3.2      2020-03-02 [1]
##  TFisher                0.2.0      2018-03-21 [2]
##  TH.data                1.0-10     2019-01-21 [2]
##  tibble               * 3.0.4      2020-10-12 [1]
##  tidyr                * 1.1.2      2020-08-27 [1]
##  tidyselect             1.1.0      2020-05-11 [1]
##  tidyverse            * 1.3.0      2019-11-21 [2]
##  tsne                   0.1-3      2016-07-15 [2]
##  TTR                    0.23-6     2019-12-15 [1]
##  usethis                1.5.1      2019-07-04 [2]
##  uwot                   0.1.5      2019-12-04 [2]
##  vcd                    1.4-7      2020-04-02 [1]
##  vctrs                  0.3.5      2020-11-17 [1]
##  VIM                    6.0.0      2020-05-08 [1]
##  viridis              * 0.5.1      2018-03-29 [2]
##  viridisLite          * 0.3.0      2018-02-01 [2]
##  withr                  2.3.0      2020-09-22 [1]
##  xfun                   0.12       2020-01-13 [2]
##  xml2                   1.3.2      2020-04-23 [1]
##  xts                    0.12-0     2020-01-19 [1]
##  XVector                0.26.0     2019-10-29 [2]
##  yaml                   2.2.1      2020-02-01 [1]
##  zip                    2.0.4      2019-09-01 [1]
##  zlibbioc               1.32.0     2019-10-29 [2]
##  zoo                    1.8-7      2020-01-10 [2]
##  source                                 
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  Bioconductor                           
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Github (clauswilke/colorblindr@1ac3d4d)
##  R-Forge (R 3.6.2)                      
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  Bioconductor                           
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Github (saezlab/progeny@94bea1c)       
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.3)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
## 
## [1] /home/uhlitzf/R/lib
## [2] /usr/local/lib/R/site-library
## [3] /usr/local/lib/R/library